########All data analyses were carried out using using R (version 3.2.2) #######
rm(list = ls())
library(foreign)
#install.packages("readstata13")
library(readstata13)
#install.packages("arm")
library(arm)
#install.packages("AER")
library(AER)

dat<-read.dta13("FDI_and_Corruption(ISQ).dta")
attach(dat)


################################################################################
##R Function: IV Regression with One Endogenous Regressor & Interaction Term ###
################################################################################
# x1: Endogenous Regressor
# x2: Exogenous Regressor
# Y: Dependent Variable
# z1: Instrument for x1
# covarmat: A Matrix of Covariates
# outliers: Statistical Outliers

ivreg1<-function(x1,x2,y,z1,covarmat,outliers){
#define dataset
datamat<-na.omit(cbind.data.frame(x1,x2,y,z1,covarmat,outliers))
#drop outliers
datamat<-subset(datamat, datamat$outliers==0)
x1<-datamat[,"x1"]
x2<-datamat[,"x2"]
y<- datamat[,"y"]
#excluded instrument
zmat<-as.matrix(datamat[,4])
#included instruments
xmat<-as.matrix(datamat[,5:(ncol(datamat)-1)])

# first-stage regression
fit1a<-lm(x1~zmat+x2+xmat,x=T)
x1.hat<-fit1a$fitted

Z<-fit1a$x
Zmat<-x2*Z[,-1]

fit1c<-lm(x1*x2~-1+Zmat)
x1x2.hat<-fit1c$fitted

#second-stage regression
fit2<-lm(y~x1.hat+x1x2.hat+x2+xmat,x=TRUE)

#correct SEs of second-stage regression
x.adj<-fit2$x
x.adj[,"x1.hat"]<-x1
x.adj[,"x1x2.hat"]<-x1*x2
resids<-y-x.adj%*%coef(fit2)
resids.sd<-sqrt(sum(resids^2)/(length(y)-ncol(x.adj)))
se.adj<-se.coef(fit2)*resids.sd/sigma.hat(fit2)

##### Function: R-squared & adjusted R-squared
rsquared<-function(y,x.adj,fit){
y.hat<-x.adj%*%coef(fit)
Rsqr<-(sum((y-mean(y))*(y.hat-mean(y.hat))))^2/(sum((y-mean(y))^2)*sum((y.hat-mean(y.hat))^2))
Rsqr.adj<-1-((length(y)-1)*(1-Rsqr)/(length(y)-ncol(x.adj)))
print(cbind(Rsqr,Rsqr.adj),2)
}

# p-values
pvalue<-2*pt(abs(coef(fit2)/se.adj),df=(length(y)-ncol(x.adj)),lower.tail=F)
output<-(cbind(coef(fit2),se.adj,abs(coef(fit2)/se.adj),pvalue))
colnames(output)<-c("Coefs","SEs","|t|", "P>|t|")
print(round(output,2))
rsquared(y,x.adj,fit2)
# no. of obs.
nrow(datamat)
}

################################################################################
# R Function: IV Regression with Two Endogenous Regressors & Interaction Term ##
################################################################################
# x1: Endogenous Regressor
# x2: Endogenous Regressor
# Y: Dependent Variable
# z1: Instrument for x1
# z2: Instrument for x2
# covarmat: A Matrix of Covariates
# outliers: Statistical Outliers

ivreg2<-function(x1,x2,y,z1,z2,covarmat,outliers){
datamat<-na.omit(cbind.data.frame(x1,x2,y,z1,z2,covarmat,outliers))
#drop outliers
datamat<-subset(datamat, datamat$outliers==0)
x1<-datamat[,"x1"]
x2<-datamat[,"x2"]
y<- datamat[,"y"]
zmat<-cbind(datamat$z1,datamat$z2)
xmat<-as.matrix(datamat[,6:(ncol(datamat)-1)])
# first stage regression
fit1a<-lm(x1~zmat+xmat,x=T)
x1.hat<-fit1a$fitted

fit1b<- lm(x2~ zmat+xmat,x=T)
x2.hat<-fit1b$fitted

Z<-fit1b$x
Zmat<-matrix(NA,nrow(xmat),ncol(Z)^2)

for (i in 1:ncol(Z)){
Zmat[,(ncol(Z)*(i-1)+1):(ncol(Z)*i)]<-Z[,i]*Z
}

fit1c<-lm(x1*x2~Zmat)
x1x2.hat<-fit1c$fitted

# second stage regression
fit2<-lm(y~x1.hat+x2.hat+x1x2.hat+xmat,x=TRUE)

x.adj<-fit2$x
x.adj[,"x1.hat"]<-x1
x.adj[,"x2.hat"]<-x2
x.adj[,"x1x2.hat"]<-x1*x2

resids<-y-x.adj%*%coef(fit2)
resids.sd<-sqrt(sum(resids^2)/(length(y)-ncol(x.adj)))
se.adj<-se.coef(fit2)*resids.sd/sigma.hat(fit2)

##### Function: R-squared & adjusted R-squared
rsquared<-function(y,x.adj,fit){
y.hat<-x.adj%*%coef(fit)
Rsqr<-(sum((y-mean(y))*(y.hat-mean(y.hat))))^2/(sum((y-mean(y))^2)*sum((y.hat-mean(y.hat))^2))
Rsqr.adj<-1-((length(y)-1)*(1-Rsqr)/(length(y)-ncol(x.adj)))
print(cbind(Rsqr,Rsqr.adj),2)
}

# p-values
pvalue<-2*pt(abs(coef(fit2)/se.adj),df=(length(y)-ncol(x.adj)),lower.tail=F)
output<-(cbind(coef(fit2),se.adj,abs(coef(fit2)/se.adj),pvalue))
colnames(output)<-c("Coefs","SEs","|t|", "P>|t|")
print(round(output,2))
rsquared(y,x.adj,fit2)
# no. of obs.
nrow(datamat)
}

################################################################################
############################ Table 1: OLS (CPI Sample) #########################
################################################################################
#Covariates
covarmat1<-cbind(democracy,resend,lopenk,avelf,catho80,muslim80,protmg80)
#Covariates with Two Legal Origin Variables
covarmat2<-cbind(democracy,resend,lopenk,avelf,catho80,muslim80,protmg80,
legor_uk, legor_fr)

#Model 1
fit1<-lm(cpi0004~rfdistockcap+clrgdpl+covarmat1)
summary(fit1)

# Model 2
fit2<-lm(cpi0004~rfdistockcap+clrgdpl+covarmat2)
summary(fit2)

# Model 3
fit3<-lm(cpi0004~rfdistockcap+clrgdpl+rfdistockcap:clrgdpl+covarmat2)
summary(fit3)

################################################################################
########################## Table 2: 2SLS (CPI Sample)###########################
################################################################################

#outliers
outliers<-ifelse(country=="Ireland"|country=="Singapore",1,0)

#Model 4
fit4<-ivreg(cpi0004~rfdistockcap+clrgdpl+democracy+resend+lopenk+avelf+catho80+
     muslim80+protmg80|distance+clrgdpl+democracy+resend+lopenk+avelf
     +catho80+muslim80+protmg80,data=dat[outliers==0,])
summary(fit4)

# Model 5
fit5<-ivreg(cpi0004~rfdistockcap+clrgdpl+democracy+resend+lopenk+avelf+catho80+
    muslim80+protmg80+legor_uk+legor_fr|distance+clrgdpl+democracy+resend+lopenk
    +avelf+catho80+muslim80+protmg80+legor_uk+legor_fr,data=dat[outliers==0,])
summary(fit5)

# Model 6
# Run Function "ivreg1" (Lines 25--75 )
ivreg1(rfdistockcap,clrgdpl,cpi0004,distance,covarmat1,outliers)

# Model 7
# Run Runction "ivreg2" (Lines 88--142 )
ivreg2(rfdistockcap,clrgdpl,cpi0004,distance,lat_abst,covarmat1,outliers)

#Model 8
ivreg2(rfdistockcap,clrgdpl,cpi0004,distance,lat_abst,covarmat2,outliers)

################################################################################
############## Figure 1: Plot of Marginal Effects (Model 8)  ###################              
################################################################################

###################### Marginal Effect Plot Function   #########################
# x1: Endogenous Regressor
# x2: Endogenous Regressor
# Y: Dependent Variable
# z1: Instrument for x1
# z2: Instrument for x2
# covarmat: A Matrix of Covariates
# outliers: Statistical Outliers

meffect.plot<-function(x1,x2,y,z1,z2,covarmat,outliers){
datamat<-na.omit(cbind.data.frame(x1,x2,y,z1,z2,covarmat,outliers))
datamat<-subset(datamat, datamat$outliers==0)
x1<-datamat[,"x1"]
x2<-datamat[,"x2"]
y<- datamat[,"y"]
zmat<-as.matrix(datamat[,4:5])
xmat<-as.matrix(datamat[,6:(ncol(datamat)-1)])

fit1a<-lm(x1~zmat+xmat,x=T)
x1.hat<-fit1a$fitted

fit1b<- lm(x2~ zmat+xmat,x=T)
x2.hat<-fit1b$fitted

Z<-fit1b$x
Zmat<-matrix(NA,nrow(xmat),ncol(Z)^2)

for (i in 1:ncol(Z)){
Zmat[,(ncol(Z)*(i-1)+1):(ncol(Z)*i)]<-Z[,i]*Z
}

fit1c<-lm(x1*x2~Zmat)
summary(fit1c)
x1x2.hat<-fit1c$fitted

fit2<-lm(y~x1.hat+x1x2.hat+x2.hat+xmat,x=TRUE)

x.adj<-fit2$x
x.adj[,"x1.hat"]<-x1
x.adj[,"x2.hat"]<-x2
x.adj[,"x1x2.hat"]<-x1*x2

resids<-y-x.adj%*%coef(fit2)
resids.sd<-sqrt(sum(resids^2)/(length(y)-ncol(x.adj)))

summ<-summary(fit2)
x<-fit2$x
sigma.sq<-sum(resids^2)/(length(y)-ncol(x.adj))
vcv<-sigma.sq*solve(t(x)%*%x)

nsim<-1000
N<-1000
y.fake<-matrix(NA,N,nsim)

gdp.sim<-seq(-2.3,2.1, length.out=N)
coef.matrix<-mvrnorm(nsim,summ$coef[1:nrow(summ$coef)],vcv)
xmat1<-as.matrix(cbind(1, gdp.sim))
coef.m<-cbind(coef.matrix[,2],coef.matrix[,3])

for (i in 1:N){
y.fake[i,]<-coef.m%*%xmat1[i,]
}

ci1<-matrix(NA,N,2)
ci2<-matrix(NA,N,2)
y.fake.mean<-rep(NA,N)
for (i in 1:N){
ci1[i,]<-quantile(y.fake[i,],c(0.025,0.975))
ci2[i,]<-quantile(y.fake[i,],c(0.05,0.95))
y.fake.mean[i]<-mean(y.fake[i,])
}

plot(gdp.sim, y.fake.mean, type="n",  cex.main=1.1,
    ylim=c(min(ci1[,1])-0.02,max(ci1[,2])+0.02), cex.lab=1,
    xlim=c(min(gdp.sim),max(gdp.sim)),
    yaxs="i", xlab="Log of Real GDP per Capita (Mean-Centered)", 
    ylab="Marginal Effects", cex.lab=0.8, cex.axis=.8,mgp = c(2,.8,0)
    )
grid(col = "gray80")
lines(gdp.sim, ci1[,1], lty=2,col="gray88")
lines(gdp.sim, ci1[,2], lty=2,col="gray88")

box()

frame.y1 <- c(ci1[,1],rev(ci1[,2]))
frame.x1 <- c(gdp.sim, rev(gdp.sim))
polygon(frame.x1, frame.y1, density=-1, col="gray88", border=NA)
abline(0,0)
lines(gdp.sim, y.fake.mean,lty=1,lwd=2,col="black")
}

################################ Figure 1 ######################################
png(file="Figure_1.png",width=6.5,height=4,units="in",res=900)
par(mfrow=c(1,1),mar=c(3.5,3.5,1,1))
meffect.plot(rfdistockcap,clrgdpl,cpi0004,distance,lat_abst,covarmat2,outliers)
dev.off()

################################################################################
detach(dat)
rm(dat)
rm(list = ls())
################################################################################
################### Models in Table 2 (2SLS): WBES #############################
################################################################################

dat<-read.dta13("FDI_and_Corruption_WBES(ISQ).dta")
attach(dat)

#Covariates
covarmat1<-cbind(polity2,resend, lopen,elf85,cath80,musl80,prot80)
 #Covariates with Two Legal Origin Variables
covarmat2<-cbind(polity2,resend, lopen,elf85,cath80,musl80,prot80,
                 leg_british ,leg_french)

outliers<-ifelse(countryname=="Kazakhstan"|countryname=="Lebanon"|
                   countryname=="Lesotho"|countryname=="Sweden"|
                   countryname=="Trinidad and Tobago",1,0)

# Run Function "ivreg2" (Lines 88--142 )
#Model 9
ivreg2(rfdistockcap,clrgdpcap,logbrib_inci,distance,latitude,covarmat1,outliers)

#Model 10
ivreg2(rfdistockcap,clrgdpcap,logbrib_inci,distance,latitude,covarmat2,outliers)

#Model 11
ivreg2(rfdistockcap,clrgdpcap,logbrib_dep,distance,latitude,covarmat1,outliers)

#Model 12
ivreg2(rfdistockcap,clrgdpcap,logbrib_dep,distance,latitude,covarmat2,outliers)

################################################################################
######## Figure 2: Marginal Effects of FDI Stock per Capita on Bribery #########
################################################################################

# Run Function "meffect.plot" (lines 208--288)
png(file="Figure_2.png",width=6.5,height=7,units="in",res=900)
par(mfrow=c(2,1),mar=c(3.5,3.5,1,1))
meffect.plot(rfdistockcap,clrgdpcap,logbrib_inci,distance,latitude,
             covarmat2,outliers)
legend(0.5,1.3,"Bribery Incidence",lty=1,lwd=2,col="black",bty="n",cex=.8,pt.lwd=.8)

meffect.plot(rfdistockcap,clrgdpcap,logbrib_dep,distance,latitude,
             covarmat2,outliers)
legend(0.5,1.45,"Bribery Depth",lty=1,lwd=2,col="black",bty="n",cex=.8,pt.lwd=.8)
dev.off()
################################################################################

